library(haven)
library(ggplot2)
library(tidyr)
library(dplyr)
library(stringr)
library(readxl)

load("data/ucdp-brd-conf-50-2016.RData")

brd.old <- read_excel("data/PRIO Battle Deaths Dataset 3.1.xls")

# Use mean of low and high if best is missing.
brd.old$bdeadbes <- if_else(brd.old$bdeadbes == -999, (brd.old$bdeadlow+brd.old$bdeadhig)/2, brd.old$bdeadbes)
# Use location for both a and b if gwnoloc is missing (often interstate wars).
brd.old$gwnoloc2 <- if_else(is.na(brd.old$gwnoloc), paste(brd.old$gwnoa, brd.old$gwnob), as.character(brd.old$gwnoloc))


s <- str_split(brd.old$gwnoloc2, " ")
ucdp <- data.frame(Year = rep(brd.old$year, sapply(s, length)),
                   BdBest = rep(brd.old$bdeadbes, sapply(s, length)),
                   numGWNO = rep(sapply(s, length), sapply(s, length)),
                   GWNoBattle = unlist(s))

ucdp.old <- group_by(ucdp, GWNoBattle, Year) %>%
  summarise(BdBest = sum(BdBest/numGWNO)
  )

ucdp.old <- ucdp.old[which(ucdp.old$Year<1989),]


s <- str_split(ucdp.brd$GWNoBattle, ", ")
ucdp.new <- data.frame(Year = rep(ucdp.brd$Year, sapply(s, length)),
                   BdBest = rep(ucdp.brd$BdBest, sapply(s, length)),
                   numGWNO = rep(sapply(s, length), sapply(s, length)),
                   GWNoBattle = unlist(s))

ucdp.new <- group_by(ucdp.new, GWNoBattle, Year) %>%
  summarise(BdBest = sum(BdBest/numGWNO)
  )

ucdpyearly <- rbind(ucdp.old, ucdp.new)